import time
import csv
from urllib import request, parse
from bs4 import BeautifulSoup
from database_operation import operate_record
from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor, wait, ALL_COMPLETED ,as_completed
from utils import array_util


def ys_request(url):
    # 等一会2s
    time.sleep(1)
    with request.urlopen(url) as res:
        res_str = str(res.read(), encoding='utf8')
    soup = BeautifulSoup(res_str, 'html.parser')
    items = soup.find('ul', class_='l-list clearfix').find_all('li', class_='item')
    data_list = []
    for item in items:
        # print(item)
        href = item.find('h3', class_='fl l-name-h3').a['href']
        name = item.find('h3', class_='fl l-name-h3').a.string
        data_list.append({'name': name, 'href': href})
    batch_insert(data_list)

'''
数据库表结构
create table ys_shopping_center
 (
 id          serial not null
 primary key,
 url         varchar(100) comment '请求URL',
 name    varchar(50) comment '购物中心名字',
 open_time    varchar(50) comment '开业时间',
 area    varchar(50) comment '商业面积',
 develop_name       varchar(50) comment '开发商',
 listed_company    varchar(10) comment '上市企业',
 city   varchar(50) comment '所在城市',
 address varchar(200) comment '项目地址',
 floor  varchar(50) comment '商业楼层',
 project_status        varchar(50) comment '项目状态',
 project_type       varchar(20) comment '项目类型',
 business_status      varchar(50) comment '招商状态'
);
'''


def export_csv():
    sql = "select t.name,t.open_time,t.area,t.develop_name,t.listed_company,t.city,t.address,t.floor," \
          "t.project_status,t.project_type,t.business_status from ys_shopping_center t where t.is_success=true "
    rows = operate_record.query_record_list(sql)
    if len(rows) == 0:
        print('-------暂无数据')
        return
    headers = ['名称', '开业时间', '面积', '开发商', '是否上市', '城市', '地址', '楼层', '项目状态', '项目类型', '招商状态']
    file_path = '/Users/bizvane_xdl/execl/江苏/购物中心.csv'
    with open(file_path, 'w', encoding='gbk') as f:
        f_csv = csv.writer(f)
        f_csv.writerow(headers)
        f_csv.writerows(rows)
    print('------>finished')


# 批量新增列表数据
def batch_insert(data_list):
    if not data_list and len(data_list) > 0:
        return
    batch_insert_values = ''
    for i in range(len(data_list)):
        data = data_list[i]
        name = data['name']
        detail_href = data['href']
        batch_insert_values += "('" + name + "','" + detail_href + "')"
        if i < len(data_list) - 1:
            batch_insert_values += ','

    batch_insert_sql = 'insert into ys_shopping_center(name,url) values '
    batch_insert_sql += batch_insert_values
    print(batch_insert_sql)
    operate_record.save_record(batch_insert_sql)


# 获取明细数据 如开发商，项目信息
def get_detail_data(orgin):
    url = orgin[1]
    id = orgin[0]
    # 等一会2s
    time.sleep(1)
    with request.urlopen(url) as res:
        res_str = str(res.read(), encoding='utf8')
    soup = BeautifulSoup(res_str, 'html.parser')
    # 开业状态
    project_status = soup.find('h2', class_='d-inf-tit', id='kaiyezhuangtai').string
    # 招商状态
    business_status = soup.find('h2', class_='d-inf-tit', id='Iszhaoshang').string
    dict_jo = {'id': id, 'is_success': True, 'project_status': project_status, 'business_status': business_status}
    attrs = soup.find('ul', class_='d-inf-status').find_all('li')
    for attr in attrs:
        key = attr.find('span', class_='colora mr60').string
        # 兄弟节点获取
        value = attr.find('span').next_sibling.get_text()
        dict_jo[key] = value

    develops = soup.find('ul', class_='d-property-list clearfix', id='').find_all('li')
    for develop in develops:
        key = develop.find('span', class_='d-property-attr').string
        value = develop.find('span', class_='d-property-value').string
        dict_jo[key] = value
    database_update(dict_jo)


# 更新数据
def database_update(data):
    update_sql = "update ys_shopping_center set open_time='{0}',area='{1}',develop_name='{2}'," \
                 "listed_company='{3}',city='{4}',address='{5}',floor='{6}',project_status='{7}'," \
                 "project_type='{8}',business_status='{9}',is_success='{10}' where id={11}"

    update_sql = update_sql.format(data['开业时间'], data['商业建筑面积'], data['开发商：'], data['上市企业：'], data['所在城市'],
                      data['项目地址'], data['商业楼层'], data['project_status'], data['项目类型'],
                      data['business_status'], data['is_success'], data['id'])
    print(update_sql)
    operate_record.save_record(update_sql)


# 查询购物中心数据
def get_shoppping_data():
    sql = "select t.id,t.url from ys_shopping_center t where t.is_success=false "
    rows = operate_record.query_record_list(sql)
    if len(rows) == 0:
        print('-------暂无数据')
        return None
    group_list = array_util.list_split_by_size(len(rows), 10)
    if not group_list:
        print('-------暂无数据')
        return
    print(group_list)
    for s in group_list:
        sts = s.split(",", 2)
        thread_list = rows[int(sts[0]):int(sts[1]) + 1]
        all_task = []
        executor = ThreadPoolExecutor(max_workers=len(thread_list), thread_name_prefix='ThreadPool')
        for orgin in thread_list:
            # 多线程处理
            task = executor.submit(get_detail_data, (orgin[0], orgin[1]))
            all_task.append(task)
        # 等待全部执行完成
        for task in as_completed(all_task):
            result = task.result()
            if not result:
                continue
        executor.shutdown(wait=True)

    print('------>{ finished }')


# 根据链接获取列表数据
def list_data():
    url = 'http://bizsearch.winshangdata.com/xiangmu/s310-c0-t2019-r0-g0-x0-d0-z0-n0-m0-l0-q0-b0-y0-pn{0}.html'
    for i in range(1, 14):
        request_url = url.format(i)
        ys_request(request_url)
    print('-------->finished')


def main():
    # 1.列表数据
    # list_data()
    # 2.单个明细数据
    # get_shoppping_data()
    # 3.导出csv文件
    export_csv()


if __name__ == '__main__':
    main()